library(dataRetrieval) #read in necessary libraries
library(plotly)
library(dplyr)
library(MASS)
library(knitr)
library(boot)
library(ggplot2)
library(data.table)
library(formattable)
library(tidyr)
TSS_Caz <- read.csv('~/AWQP/TSS.csv') %>%
dplyr::select(TSS..mg.L., Name, TSS) #read in data for each persons TSS Standards
mean_Caz = mean(TSS_Caz$TSS..mg.L.)
RMSE_Caz <- sqrt(mean((TSS_Caz$TSS - TSS_Caz$TSS..mg.L.)^2)) #find RSME
sd_Caz <- sd(TSS_Caz$TSS..mg.L., na.rm = FALSE) #find standard deviation
cv_Caz <- sd(TSS_Caz$TSS..mg.L.) / mean(TSS_Caz$TSS..mg.L.) * 100 #find cv
TSS_Justin <- read.csv('~/AWQP/TSS_Justin.csv') %>%
dplyr::select(TSS..mg.L., Name, TSS)
mean_Justin = mean(TSS_Justin$TSS..mg.L.)
RMSE_Justin <- sqrt(mean((TSS_Justin$TSS - TSS_Justin$TSS..mg.L.)^2)) #find RSME
sd_Justin <- sd(TSS_Justin$TSS..mg.L., na.rm = FALSE) #find standard deviation
cv_Justin <- sd(TSS_Justin$TSS..mg.L.) / mean(TSS_Justin$TSS..mg.L.) * 100 #find cv
TSS_Melina <- read.csv('~/AWQP/TSS_Melina.csv') %>%
dplyr::select(TSS..mg.L., Name, TSS)
mean_Mel = mean(TSS_Melina$TSS..mg.L.)
RMSE_Mel <- sqrt(mean((TSS_Melina$TSS - TSS_Melina$TSS..mg.L.)^2)) #find RSME
sd_Mel <- sd(TSS_Melina$TSS..mg.L., na.rm = FALSE) #find standard deviation
cv_Mel <- sd(TSS_Melina$TSS..mg.L.) / mean(TSS_Melina$TSS..mg.L.) * 100
TSS_Mia <- read.csv('~/AWQP/TSS_Mia_.csv') %>%
dplyr::select(TSS..mg.L., Name, TSS) %>%
na.omit()
mean_Mia = mean(TSS_Mia$TSS..mg.L.)
RMSE_Mia <- sqrt(mean((TSS_Mia$TSS - TSS_Mia$TSS..mg.L.)^2)) #find RSME
sd_Mia <- sd(TSS_Mia$TSS..mg.L., na.rm = FALSE) #find standard deviation
cv_Mia <- sd(TSS_Mia$TSS..mg.L.) / mean(TSS_Mia$TSS..mg.L.) * 100
TSS_Morgan <- read.csv('~/AWQP/Morgan_TSS.csv') %>%
dplyr::select(TSS..mg.L., Name, TSS) %>%
na.omit()
mean_Morgan = mean(TSS_Morgan$TSS..mg.L.)
RMSE_Morgan <- sqrt(mean((TSS_Morgan$TSS - TSS_Morgan$TSS..mg.L.)^2)) #find RSME
sd_Morgan <- sd(TSS_Morgan$TSS..mg.L., na.rm = FALSE) #find standard deviation
cv_Morgan <- sd(TSS_Morgan$TSS..mg.L.) / mean(TSS_Morgan$TSS..mg.L.) * 100
TSS_bind1 <- rbind(TSS_Caz, TSS_Justin, TSS_Melina, TSS_Mia, TSS_Morgan) %>% #combine individual data sets
na.omit() %>%
dplyr::select(TSS..mg.L., Name)
TSS <- TSS_bind1 %>%
group_by(Name) %>%
mutate(row = row_number()) %>%
tidyr::pivot_wider(names_from = Name, values_from = TSS..mg.L.) %>%
dplyr::select(-row)
tab_stats <- matrix(c(mean_Caz,sd_Caz, RMSE_Caz, cv_Caz, mean_Justin, sd_Justin, RMSE_Justin, cv_Justin, mean_Mel, sd_Mel, RMSE_Mel, cv_Mel, mean_Mia,sd_Mia, RMSE_Mia, cv_Mia, mean_Morgan,sd_Morgan, RMSE_Morgan, cv_Morgan), ncol=4, byrow=TRUE) #create matrix for table
colnames(tab_stats) <- c('Mean(ppm)','Standard Deviation(ppm)','Root Mean Square Error(ppm)', 'Coefficient of Variation(%)') #label columns
rownames(tab_stats) <- c('Caz','Justin','Melina', 'Mia', 'Morgan') #label rows
table_stats <- as.table(tab_stats)
formattable(table_stats) %>%
kable(caption = "Summary Statistics") #format table
Summary Statistics
| Caz |
57.27273 |
3.595733 |
8.040303 |
6.278263 |
| Justin |
65.33333 |
8.344437 |
17.256239 |
12.772098 |
| Melina |
56.29630 |
5.386311 |
8.089011 |
9.567789 |
| Mia |
60.37037 |
7.895928 |
12.765695 |
13.079145 |
| Morgan |
43.03030 |
6.403913 |
9.265991 |
14.882333 |
fig <- plot_ly(TSS_bind1, x = ~Name, y = ~TSS..mg.L., color = ~Name, colors = c("red", "blue", "purple", 'green4', 'orange2')) #select values for plotly, assign colors
fig <- fig %>% add_markers(marker = list(line = list(color = ~Name, width = 1)))
fig <- fig %>% layout(
title = "TSS Standards", #title
xaxis = list(domain = c(0.1, 1)),
yaxis = list(title = "TSS (mg/L)"),
updatemenus = list(
list(
y = 0.8,
buttons = list(
list(method = "restyle", #set up box plot
args = list("type", "box"),
label = "Box plot"
),
list(method = "restyle", #set up violin plot
args = list("type", "violin"),
label = "Violin"
)))
))
fig
cols <- c("red", "blue", "purple", 'green4', 'orange2') #attach colors for each person
# Basic density plot in ggplot2
ggplot( TSS_bind1, aes(x = TSS..mg.L., colour = Name)) + #sort colors for names
geom_density(lwd = 1.2, linetype = 1) +
scale_color_manual(values = cols) + #use previous set colors
labs(x = 'TSS mg/L', y = 'Density',
title = 'TSS Kernel Density Estimation') #label the graph

mean.function <- function(x, index) {
d <- x[index] # This first line will go in ever bootstrap function you make.
return(mean(d))
}
set.seed(50)
BootDist_C <- boot(data = TSS_Caz$TSS..mg.L., mean.function, R=10000)
low_C <- quantile( BootDist_C$t, probs=(.025))
upper_C <- quantile( BootDist_C$t, probs= (.975))
TSS_Morgan <- read.csv('~/AWQP/Morgan_TSS.csv') %>%
dplyr::select(TSS..mg.L., Name, TSS) %>%
na.omit()
mean_Morgan = mean(TSS_Morgan$TSS..mg.L.)
RMSE_Morgan <- sqrt(mean((TSS_Morgan$TSS - TSS_Morgan$TSS..mg.L.)^2)) #find RSME
sd_Morgan <- sd(TSS_Morgan$TSS..mg.L., na.rm = FALSE) #find standard deviation
cv_Morgan <- sd(TSS_Morgan$TSS..mg.L.) / mean(TSS_Morgan$TSS..mg.L.) * 100
BootDist_Morgan <- boot(data = TSS_Morgan$TSS..mg.L., statistic = mean.function, R=10000)
set.seed(50)
low_Morg <- quantile( BootDist_Morgan$t, probs=(.025))
upper_Morg <- quantile( BootDist_Morgan$t, probs= (.975))
mean.function <- function(x, index) {
d <- x[index] # This first line will go in ever bootstrap function you make.
return(mean(d))
}
set.seed(50)
BootDist_C1 <- boot(data = TSS_Caz$TSS..mg.L., mean.function, R=10000)
low_C1 <- quantile( BootDist_C$t, probs=(.005))
upper_C1 <- quantile( BootDist_C$t, probs= (.995))
BootDist_Morgan1 <- boot(data = TSS_Morgan$TSS..mg.L., statistic = mean.function, R=10000)
set.seed(50)
low_Morg1 <- quantile( BootDist_Morgan$t, probs=(.005))
upper_Morg1 <- quantile( BootDist_Morgan$t, probs= (.995))
Confidence Intervals
| Caz |
57.27273 |
55.15152 |
59.39394 |
54.84848 |
60.00000 |
| Justin |
65.33333 |
60.66667 |
70.33333 |
59.33333 |
71.66667 |
| Melina |
56.29630 |
53.33333 |
59.62963 |
52.22222 |
60.74074 |
| Mia |
60.37037 |
55.55556 |
65.18519 |
54.07407 |
66.66667 |
| Morgan |
43.03030 |
39.69697 |
46.66667 |
38.78788 |
48.18182 |
```